In [ ]:
You are free to use or adapt this notebook for any purpose you'd like. However, please respect the Simplified BSD License that governs its use.
Twitter implements OAuth 1.0A as its standard authentication mechanism, and in order to use it to make requests to Twitter's API, you'll need to go to https://dev.twitter.com/apps and create a sample application.
Choose any name for your application, write a description and use http://google.com
for the website.
Under Key and Access Tokens, there are four primary identifiers you'll need to note for an OAuth 1.0A workflow:
Note that you will need an ordinary Twitter account in order to login, create an app, and get these credentials.
The first time you execute the notebook, add all credentials so that you can save them in the pkl
file, then you can remove the secret keys from the notebook because they will just be loaded from the pkl
file.
The pkl
file contains sensitive information that can be used to take control of your twitter acccount, do not share it.
In [ ]:
import pickle
import os
In [ ]:
if not os.path.exists('secret_twitter_credentials.pkl'):
Twitter={}
Twitter['Consumer Key'] = ''
Twitter['Consumer Secret'] = ''
Twitter['Access Token'] = ''
Twitter['Access Token Secret'] = ''
with open('secret_twitter_credentials.pkl','wb') as f:
pickle.dump(Twitter, f)
else:
Twitter=pickle.load(open('secret_twitter_credentials.pkl','rb'))
Install the twitter
package to interface with the Twitter API
In [ ]:
!pip install twitter
In [ ]:
import twitter
auth = twitter.oauth.OAuth(Twitter['Access Token'],
Twitter['Access Token Secret'],
Twitter['Consumer Key'],
Twitter['Consumer Secret'])
twitter_api = twitter.Twitter(auth=auth)
# Nothing to see by displaying twitter_api except that it's now a
# defined variable
print(twitter_api)
Twitter identifies locations using the Yahoo! Where On Earth ID.
The Yahoo! Where On Earth ID for the entire world is 1. See https://dev.twitter.com/docs/api/1.1/get/trends/place and http://developer.yahoo.com/geo/geoplanet/
look at the BOSS placefinder here: https://developer.yahoo.com/boss/placefinder/
In [ ]:
WORLD_WOE_ID = 1
US_WOE_ID = 23424977
Look for the WOEID for san-diego
You can change it to another location.
In [ ]:
LOCAL_WOE_ID=2487889
# Prefix ID with the underscore for query string parameterization.
# Without the underscore, the twitter package appends the ID value
# to the URL itself as a special case keyword argument.
world_trends = twitter_api.trends.place(_id=WORLD_WOE_ID)
us_trends = twitter_api.trends.place(_id=US_WOE_ID)
local_trends = twitter_api.trends.place(_id=LOCAL_WOE_ID)
In [ ]:
world_trends[:2]
In [ ]:
trends=local_trends
print(type(trends))
print(list(trends[0].keys()))
print(trends[0]['trends'])
In [ ]:
import json
print((json.dumps(us_trends[:2], indent=1)))
In [ ]:
trends_set = {}
trends_set['world'] = set([trend['name']
for trend in world_trends[0]['trends']])
trends_set['us'] = set([trend['name']
for trend in us_trends[0]['trends']])
trends_set['san diego'] = set([trend['name']
for trend in local_trends[0]['trends']])
In [ ]:
for loc in ['world','us','san diego']:
print(('-'*10,loc))
print((','.join(trends_set[loc])))
In [ ]:
print(( '='*10,'intersection of world and us'))
print((trends_set['world'].intersection(trends_set['us'])))
print(('='*10,'intersection of us and san-diego'))
print((trends_set['san diego'].intersection(trends_set['us'])))
Set the variable q
to a trending topic,
or anything else for that matter. The example query below
was a trending topic when this content was being developed
and is used throughout the remainder of this chapter
In [ ]:
q = '#MTVAwards'
number = 100
# See https://dev.twitter.com/docs/api/1.1/get/search/tweets
search_results = twitter_api.search.tweets(q=q, count=number)
statuses = search_results['statuses']
In [ ]:
len(statuses)
print(statuses)
Twitter often returns duplicate results, we can filter them out checking for duplicate texts:
In [ ]:
all_text = []
filtered_statuses = []
for s in statuses:
if not s["text"] in all_text:
filtered_statuses.append(s)
all_text.append(s["text"])
statuses = filtered_statuses
In [ ]:
len(statuses)
In [ ]:
[s['text'] for s in search_results['statuses']]
In [ ]:
# Show one sample search result by slicing the list...
print(json.dumps(statuses[0], indent=1))
In [ ]:
# The result of the list comprehension is a list with only one element that
# can be accessed by its index and set to the variable t
t = statuses[0]
#[ status for status in statuses
# if status['id'] == 316948241264549888 ][0]
# Explore the variable t to get familiarized with the data structure...
print(t['retweet_count'])
print(t['retweeted'])
In [ ]:
status_texts = [ status['text']
for status in statuses ]
screen_names = [ user_mention['screen_name']
for status in statuses
for user_mention in status['entities']['user_mentions'] ]
hashtags = [ hashtag['text']
for status in statuses
for hashtag in status['entities']['hashtags'] ]
# Compute a collection of all words from all tweets
words = [ w
for t in status_texts
for w in t.split() ]
In [ ]:
# Explore the first 5 items for each...
print(json.dumps(status_texts[0:5], indent=1))
print(json.dumps(screen_names[0:5], indent=1))
print(json.dumps(hashtags[0:5], indent=1))
print(json.dumps(words[0:5], indent=1))
In [ ]:
from collections import Counter
for item in [words, screen_names, hashtags]:
c = Counter(item)
print(c.most_common()[:10]) # top 10
print()
In [ ]:
def prettyprint_counts(label, list_of_tuples):
print("\n{:^20} | {:^6}".format(label, "Count"))
print("*"*40)
for k,v in list_of_tuples:
print("{:20} | {:>6}".format(k,v))
In [ ]:
for label, data in (('Word', words),
('Screen Name', screen_names),
('Hashtag', hashtags)):
c = Counter(data)
prettyprint_counts(label, c.most_common()[:10])
In [ ]:
retweets = [
# Store out a tuple of these three values ...
(status['retweet_count'],
status['retweeted_status']['user']['screen_name'],
status['text'].replace("\n","\\"))
# ... for each status ...
for status in statuses
# ... so long as the status meets this condition.
if 'retweeted_status' in status
]
We can build another prettyprint
function to print entire tweets with their retweet count.
We also want to split the text of the tweet in up to 3 lines, if needed.
In [ ]:
row_template = "{:^7} | {:^15} | {:50}"
def prettyprint_tweets(list_of_tuples):
print()
print(row_template.format("Count", "Screen Name", "Text"))
print("*"*60)
for count, screen_name, text in list_of_tuples:
print(row_template.format(count, screen_name, text[:50]))
if len(text) > 50:
print(row_template.format("", "", text[50:100]))
if len(text) > 100:
print(row_template.format("", "", text[100:]))
In [ ]:
# Slice off the first 5 from the sorted results and display each item in the tuple
prettyprint_tweets(sorted(retweets, reverse=True)[:10])
In [ ]: